
clear all

* Notes
*	- all the switch values are the highest probabilities such that the evaluation option is preferred

cd "/Users/sandroambuehl/Dropbox/Demand for Information/DATA_ANALYSIS/"

set more off


*******************************************************************************************************************************************
*** Import all data						    ***********************************************************************************************
*******************************************************************************************************************************************




ztree2stata subjects using "data/2nd_session_data_raw.xls", replace

drop session
gen session = 2
replace Subject = 100 * session + Subject
save "data/session_2_raw.dta", replace

clear

ztree2stata subjects using "data/3rd_session_data_raw.xls", replace
drop session
gen session = 3
replace Subject = 100 * session + Subject
save "data/session_3_raw.dta", replace

clear

ztree2stata subjects using "data/4th_session_data_raw.xls", replace
drop session
gen session = 4
replace Subject = 100 * session + Subject
save "data/session_4_raw.dta", replace


clear

ztree2stata subjects using "data/5th_session_data_raw.xls", replace
drop session
gen session = 5
replace Subject = 100 * session + Subject
save "data/session_5_raw.dta", replace

clear

ztree2stata subjects using "data/6th_session_data_raw.xls", replace
drop session
gen session = 6
replace Subject = 100 * session + Subject
save "data/session_6_raw.dta", replace


clear

ztree2stata subjects using "data/7th_session_data_raw.xls", replace
drop session
gen session = 7
replace Subject = 100 * session + Subject
save "data/session_7_raw.dta", replace


clear

ztree2stata subjects using "data/8th_session_data_raw.xls", replace
drop session
gen session = 8
replace Subject = 100 * session + Subject
save "data/session_8_raw.dta", replace


clear

ztree2stata subjects using "data/9th_session_data_raw.xls", replace
drop session
gen session = 9
replace Subject = 100 * session + Subject
save "data/session_9_raw.dta", replace

clear

ztree2stata subjects using "data/10th_session_data_raw.xls", replace
drop session
gen session = 10
replace Subject = 100 * session + Subject
save "data/session_10_raw.dta", replace




append using "data/session_2_raw.dta"
append using "data/session_3_raw.dta"
append using "data/session_4_raw.dta"
append using "data/session_5_raw.dta"
append using "data/session_6_raw.dta"
append using "data/session_7_raw.dta"
append using "data/session_8_raw.dta"
append using "data/session_9_raw.dta"


keep session treatment Period Subject Group right_white left_total right_total left_black ball_drawn box_selected_1 box_selected_2 box1 box3 switch a b elic_box switch_black switch_white elic_color crt1 crt2 crt3 age gender race major yrs_in_college statistics_course Bayes intuition_valuations intuition_conditionals intuition_unconditionals conscious_valuation conscious_conditionals conscious_unconditionals outside_chance ball_drawn1 - ball_drawn12 X_chosen1-X_chosen12 Y_chosen1-Y_chosen12 lottery_chosen1-lottery_chosen12 lotr1 political_orientation religious_beliefs3 lotr2 lotr3 religious_beliefs1 lotr4 lotr5 religious_beliefs2 lotr6 superstition1 superstition2 superstition3 box_X_selected
 

rename Period period
rename Group group
rename Subject subject

drop a b

gen a = left_black / left_total
gen b = right_white / right_total

gen precision = a + b
gen asymmetry = a - b

gen v = (switch + 1.5) / 100
gen lin = a+b
gen sq = a^2 + b^2
gen interaction = a*b
gen boundary = (a == 1 | b == 1)

gen info_struct = period
replace info_struct = 11 - period if group == 0 & (treatment == 2 | treatment == 3 )
replace info_struct = 12 - period if group == 0 & (treatment == 2 | treatment == 3 | treatment == 4) & (session == 4 | session == 10)



*******************************************************************************************************************************************
*** subject characteristics				    ***********************************************************************************************
*******************************************************************************************************************************************


preserve

replace lotr2 = 5 - lotr2 
replace lotr4 = 5 - lotr4
replace lotr5 = 5 - lotr5

* 1 = "I agree a lot"; 2 = "I agree a little"; 3 = "I disagree a little"; 4 = "I disagree a lot"

gen lotr = lotr1 + lotr2 + lotr3 + lotr4 + lotr5 + lotr6

replace lotr = 24 - lotr

* political_orientation 
* 1 = "always republican"; 2 = "usually republican"; 3="about equally often republican as democrat"; 4="usually democrat"; 5="always democrat"

replace religious_beliefs1 = 5 - religious_beliefs1
replace religious_beliefs2 = 5 - religious_beliefs2
replace religious_beliefs3 = 5 - religious_beliefs3

replace superstition1 = 5 - superstition1
replace superstition2 = 5 - superstition2
replace superstition3 = 5 - superstition3

gen superstition_sum = superstition1 + superstition2 + superstition3
 
keep if treatment == 6
drop treatment
save "data/subject_characteristics.dta", replace

restore



*******************************************************************************************************************************************
*** data from part 1					    ***********************************************************************************************
*******************************************************************************************************************************************



preserve

keep if treatment == 1

gen pred_correct = (box1 == ball_drawn)

qui hist period if pred_correct == 0, bin(6) title("wrong predictions in part 1 by information structure")
graph export "data/errors_part1.pdf", replace

sort subject
by subject: egen all_pred_correct = min(pred_correct)
by subject: egen mean_pred_correct = mean(pred_correct)
by subj: egen num_b_part1 = sum(ball_drawn)

forvalues i = 1/6 {
	by subj: egen ball_i = mean(ball_drawn) if period == `i'
	by subj: egen ball`i'_part1 = mean(ball_i)
	drop ball_i
}


collapse all_pred_correct mean_pred_correct ball1_part1 ball2_part1 ball3_part1 ball4_part1 ball5_part1 ball6_part1 num_b_part1, by(subject)

save "data/part1.dta", replace


restore







*******************************************************************************************************************************************
*** data from part 3					    ***********************************************************************************************
*******************************************************************************************************************************************




preserve

keep if treatment == 3
drop left_black right_white left_total right_total ball_drawn box_selected_1 box_selected_2 box1 box3 switch elic_box elic_color v lin sq interaction boundary

rename switch_black switch_cond_X_black
rename switch_white switch_cond_Y_white

gen p11 = (switch_cond_X_black + 1.5) / 100
gen p00 = (switch_cond_Y_white + 1.5) / 100


save "data/part3.dta", replace

restore


*******************************************************************************************************************************************
*** data from part 4					    ***********************************************************************************************
*******************************************************************************************************************************************


preserve

keep if treatment == 4

gen switch_uncond = switch_black
rename switch_black switch_uncond_black

gen p1_elic = (switch_uncond + 1.5)/100
replace p1_elic = 1 if p1_elic > 1
replace p1_elic = 0 if p1_elic < 0
gen p1 = p1_elic
replace p1 = 1 - p1_elic if elic_col == 0

gen dev_bayes_p1_elic = p1_elic - ((a + (1 - b)) / 2)
replace dev_bayes_p1_elic = p1_elic - ((b + (1 - a)) / 2) if elic_color == 0


* recode such that this is always the probability of seeing a black ball
* the switch is the highest prob such that evaluation option preferred, hence the discounting 3. 

replace switch_uncond_black = (100 - switch_uncond_black) - 3 if elic_color == 0

// gen p1 = (switch_uncond_black + 1.5 ) / 100

drop switch switch_white left_black right_white left_total right_total ball_drawn box_selected_1 box_selected_2 box1 box3 switch elic_box v lin sq interaction boundary

rename group group_part4

save "data/part4.dta", replace

restore



*******************************************************************************************************************************************
*** data from part 5					    ***********************************************************************************************
*******************************************************************************************************************************************

preserve

keep if treatment == 5

keep subject period a b info_struct outside_chance ball_drawn1 - ball_drawn12 X_chosen1-X_chosen12 Y_chosen1-Y_chosen12 lottery_chosen1-lottery_chosen12 box_X_selected


replace period = period+20
replace outside_chance = 0.8 if period == 21
replace outside_chance = 0.9 if period == 22
replace outside_chance = 0.8 if period == 23
replace outside_chance = 0.75 if period == 24



reshape long ball_drawn X_chosen Y_chosen lottery_chosen, i(subject period) j(round)

replace ball_drawn = -1 if ball_drawn == 0

rename X_chosen x_chosen
rename Y_chosen y_chosen
rename box_X_selected box_x_selected
gen hit = 1-lottery_chosen

gen b_post = .
gen num_b = .

sort subject period round

forvalues i = 1/12 {
	replace num_b = ball_drawn if round == 1
	replace b_post = 0.5 if round == 1
	replace num_b = num_b[_n-1] + ball_drawn if round == `i'
	replace b_post = (a*b_post[_n-1]) / (a*b_post[_n-1] + (1-a) * (1-b_post[_n-1])) if round == `i' & ball_drawn == 1
	replace b_post = ((1-a)*b_post[_n-1]) / ((1-a)*b_post[_n-1] + a * (1-b_post[_n-1])) if round == `i' & ball_drawn == -1
}

gen b_hitting_round = round if 1 - (b_post < outside_chance & b_post > 1 - outside_chance)
sort subject period
by subject period: egen b_first = min(b_hitting_round)
replace b_hitting_round = . if b_hitting_round != b_first
replace b_hitting_round = 1 if b_hitting_round != .
gen b_box_at_hit2 = (b_post > 0.5) if b_hitting_round == 1
sort subject period
by subject period: egen b_box_at_hit = min(b_box_at_hit2)
drop b_box_at_hit2

gen id = 10000 * subj + period

xtset id round
sort id



* which is the box they get sure for? 
* the right measure is: define posteriors as posteriors for the box they get sure for first, and compare this to what the bayesian posterior is for this box!


* get first hit
gen above = .
replace above = round if hit == 1
sort id
by id: egen first = min(above)
replace above = . if first != above
gen hitting_round = (round == above)
gen box_at_hit2 = 1 if x_chosen == 1 & hitting_round == 1
replace box_at_hit2 = 0 if y_chosen == 1 & hitting_round == 1
by id: egen box_at_hit = min(box_at_hit2)
drop box_at_hit2

* generate bayes posterior when hit
gen b_when_above = .
replace b_when_above = b_post if hitting_round == 1
replace b_when_above = 1 - b_post if y_chosen == 1 // if (b_post < 0.5) & hitting_round == 1
by id: egen b_post_when_hit = min(b_when_above)
gen logit_b_post_when_hit = log(b_post_when_hit / (1 - b_post_when_hit))
gen logit_outside_chance = log( outside_chance / (1 - outside_chance))
gen diff_b_post_when_hit = b_post_when_hit - outside_chance
gen diff_logit_b_post_when_hit = logit_b_post_when_hit - logit_outside_chance




********************	dealing with missings					****************************************************************
replace first = 13 if first == .



sort subject
by subject: egen mean_first = mean(first)
by subject: egen b_mean_first = mean(b_first)
by subject: egen mean_b_post_when_hit = mean(b_post_when_hit)
by subject: egen mean_logit_b_post_when_hit = mean(logit_b_post_when_hit)

gen diff_hit_f = first - b_first
gen mean_diff_hit_f = mean_first - b_mean_first

gen hit_same_as_bayes = (box_at_hit == b_box_at_hit)

replace hit_same_as_bayes = . if box_at_hit == . | b_box_at_hit == .


gen b_decision = 0
replace b_decision = 1 if b_post > outside_chance & box_x_selected == 1
replace b_decision = 1 if b_post < 1 - outside_chance & box_x_selected == 0
replace b_decision = -1 if b_post < 1 - outside_chance & box_x_selected == 1
replace b_decision = -1 if b_post > outside_chance & box_x_selected == 0

gen decision = 0 
replace decision = 1 if x_chosen == 1 & box_x_selected == 1
replace decision = 1 if y_chosen == 1 & box_x_selected == 0
replace decision = -1 if y_chosen == 1 & box_x_selected == 1
replace decision = -1 if x_chosen == 1 & box_x_selected == 0

gen testest =0

save "data/hitting.dta", replace


gen frac_diff_hit_f = (diff_hit_f>0)

collapse a b info_struct outside_chance first b_first mean_first b_mean_first diff_hit_f mean_diff_hit_f mean_b_post_when_hit frac_diff_hit_f hit_same_as_bayes box_at_hit b_box_at_hit mean_logit_b_post_when_hit diff_b_post_when_hit diff_logit_b_post_when_hit b_post_when_hit logit_b_post_when_hit, by(subject period)

replace info_struc = info_struct+10


set seed 249233

gen rnd = runiform()
sort subj rnd
gen rndid = [_n]
replace rndid = rndid-4*floor(rndid/4) + 1
gen rndhalf = (rndid <= 2)
replace rndhalf = 2 if rndhalf == 0
sort subj period
drop rndid


save "data/part5.dta", replace



********************	dealing with missings					****************************************************************
drop if b_first == .

collapse mean_diff_hit_f frac_diff_hit_f mean_b_post_when_hit mean_logit_b_post_when_hit hit_same_as_bayes mean_first diff_b_post_when_hit diff_logit_b_post_when_hit, by(subject)
save "data/part5_subj.dta", replace

restore



*******************************************************************************************************************************************
*** integrate all parts					    ***********************************************************************************************
*******************************************************************************************************************************************


 
keep if treatment == 2

drop crt1 crt2 crt3 age gender race major yrs_in_college statistics_course Bayes intuition_valuations intuition_conditionals intuition_unconditionals conscious_valuation conscious_conditionals conscious_unconditionals religious_beliefs1 religious_beliefs2 religious_beliefs3 political_orientation superstition1 superstition2 superstition3 lotr1 lotr2 lotr3 lotr4 lotr5 lotr6 elic_color
drop switch_black switch_white

merge m:1 subject using "data/subject_characteristics.dta"
drop _merge

merge m:1 subject using "data/part1.dta"
drop _merge

merge 1:1 subject period using "data/part3.dta"
drop _merge

merge 1:1 subject period using "data/part4.dta"
drop _merge

merge m:1 subject using "data/part5_subj.dta"
drop _merge

append using "data/part5.dta"

drop treatment
drop ball_drawn1 - ball_drawn12 X_chosen1-X_chosen12 Y_chosen1-Y_chosen12 lottery_chosen1-lottery_chosen12



* in sessions 1, 2 and 3, everybody was in group 1 in part 4 (i.e. was given the information structures in the same order)
* this part of the script fixes this. 


preserve

keep if group != group_part4

keep subject period p1 p1_elic dev_bayes_p1_elic elic_color

reshape wide p1 elic_color p1_elic dev_bayes_p1_elic, i(subject) j(period)

forvalues i=1/10 {
	local j = 11 - `i'
	gen q`i' = p1`j'
	gen q1_elic`i' = p1_elic`j'
	gen dev_bayes_q1_elic`i' = dev_bayes_p1_elic`j'	
	gen bd`i' = elic_color`j'
}
forvalues i=1/10 {
	replace p1`i' = q`i'
	replace elic_color`i' = bd`i'
	replace p1_elic`i' = q1_elic`i'
	replace dev_bayes_p1_elic`i' = dev_bayes_q1_elic`i'
}

drop q* bd* dev_bayes_q*

reshape long p1 elic_color p1_elic dev_bayes_p1_elic, i(subject) j(period)

rename p1 p1_flipped
rename elic_color elic_color_flipped
rename p1_elic p1_elic_flipped
rename dev_bayes_p1_elic dev_bayes_p1_elic_flipped

sort subject period
save "group_correction.dta", replace

restore



sort subject period
merge subject period using "group_correction.dta"
drop _merge

replace p1 = p1_flipped if group != group_part4
replace elic_color = elic_color_flipped if group != group_part4
replace p1_elic = p1_elic_flipped if group != group_part4
replace dev_bayes_p1_elic = dev_bayes_p1_elic_flipped if group != group_part4

 
drop p1_flipped elic_color_flipped p1_elic_flipped dev_bayes_p1_elic_flipped



* calculate bayesian values

rename Bayes knows_bayes_law
gen b_v = .

replace b_v = (a+b) / 2 if period <= 11

gen bayes_v_switch = floor(100 * b_v/3)*3+1      // this is the highest lottery for which the prediction game should be preferred

gen b_p11 = a / (a + 1 - b) if period <= 10
gen b_p00 = b / (b + 1 - a) if period <= 10

gen bayes_cond_X_black_switch = floor(100*b_p11/3)*3+1
gen bayes_cond_Y_white_switch = floor(100*b_p00/3)*3+1

gen b_p1 = (a + 1 - b) / 2
gen b_p0 = (b + 1 - a) / 2


// note: the variable switch is also the highest lottery for which the prediction game (evaluation option) is preferred. 


drop box_selected_1 box_selected_2 ball_drawn elic_box switch_b switch_w 

append using "data/Pilot_Data.dta"
replace session = 1 if session == . 
replace subject = 100 * session + subject if session == 1
replace info_s = 12 if info_s == 5 & session == 1


*******************************************************************************************************************************************
*** flip all info_s to upper triangle		***********************************************************************************************
*******************************************************************************************************************************************


gen p11_unflipped = p11
gen p00_unflipped = p00
gen a_unflipped = a
gen b_unflipped = b

gen p11_s = p11
replace p11_s = p00 if asymmetry < 0
gen p00_s = p00
replace p00_s = p11 if asymmetry < 0
gen p1_s = p1
replace p1_s = 1 - p1 if asymmetry < 0
gen b_p11_s = b_p11
replace b_p11_s = b_p00 if asymmetry < 0
gen b_p00_s = b_p00
replace b_p00_s = b_p11 if asymmetry <0
gen b_p1_s = b_p1
replace b_p1_s = 1 - b_p1 if asymmetry < 0
gen a_s = a
gen b_s = b
replace a_s = b if asymmetry < 0
replace b_s = a if asymmetry < 0

replace asymmetry = abs(asymmetry)



// gen v_pred = p1 * p11 + (1-p1) * p00
// gen v_pred_s = p1_s * p11_s + (1-p1_s) * p00_s



drop p11 p00 p1 b_p11 b_p00 b_p1 a b
rename p11_s p11
rename p00_s p00
rename p1_s p1
rename b_p11_s b_p11
rename b_p00_s b_p00
rename b_p1_s b_p1
rename a_s a
rename b_s b



* name information structures lexicographically (precision, aasymmetry in increasing order)

gen info_s2 = .
replace info_s2 = 1 if info_struct == 2
replace info_s2 = 2 if info_struct == 4
replace info_s2 = 3 if info_struct == 9
replace info_s2 = 4 if info_struct == 6
replace info_s2 = 5 if info_struct == 1
replace info_s2 = 6 if info_struct == 8
replace info_s2 = 7 if info_struct == 7
replace info_s2 = 8 if info_struct == 5
replace info_s2 = 9 if info_struct == 10
replace info_s2 = 10 if info_struct == 3

replace info_s2 =info_struct if info_struct > 10

drop info_struct
rename info_s2 info_structure

*set to 0 or 1 the observations that got outside this interval by the 'midpoint' procedure. 

replace p00 = 1 if p00 > 1
replace p00 = 0 if p00 < 0
replace p11 = 1 if p11 > 1
replace p11 = 0 if p11 < 0
replace p1 = 1 if p1 > 1
replace p1 = 0 if p1 < 0



replace p00_unflipped = 1 if p00_unflipped > 1
replace p00_unflipped = 0 if p00_unflipped < 0
replace p11_unflipped = 1 if p11_unflipped > 1
replace p11_unflipped = 0 if p11_unflipped < 0



*******************************************************************************************************************************************
*** generate variables for analysis		    ***********************************************************************************************
*******************************************************************************************************************************************




gen half = (period > 5)

sort subject half
egen tag_sub = tag(subj)
egen tag_sub_half = tag(subj half)

gen v_pred = p1 * p11 + (1-p1) * p00
gen v_pred_true_uncond = b_p1 * p11 + (1 - b_p1) * p00
gen v_pred_true_cond = p1 * b_p11 + (1 - p1) * b_p00
gen v_pred_true_all = b_p1 * b_p11 + (1 - b_p1) * b_p00

gen dev_bayes_v = v - v_pred_true_all 
gen dev_bayes_p11 = p11 - b_p11
gen dev_bayes_p00 = p00 - b_p00
gen dev_bayes_cond_both = (dev_bayes_p11 + dev_bayes_p00) / 2

gen num_dev_bayes_v = dev_bayes_v >= 0 if info_struc <= 10
gen num_dev_bayes_p11 = dev_bayes_p11 >= 0 if info_struc <= 10
gen num_dev_bayes_p00 = dev_bayes_p00 >= 0 if info_struc <= 10
gen num_dev_bayes_cond_both = (num_dev_bayes_p11 + num_dev_bayes_p00) / 2

gen num_dev_med_v = . 
gen num_dev_med_p11 = . 
gen num_dev_med_p00 = . 
gen dev_med_v = . 
gen dev_med_p11 = . 
gen dev_med_p00 = . 

forvalues i = 1/10 {
	qui su v if info_struct == `i',d
	local med_v = r(p50)
	replace num_dev_med_v = (v > `med_v') if info_struc == `i'
	replace dev_med_v = v - `med_v' if info_struc == `i'
	qui su p11 if info_struct == `i',d
	local med_p11 = r(p50)
	replace num_dev_med_p11 = (p11 > `med_v') if info_struc == `i'
	replace dev_med_p11 = p11 - `med_v' if info_struc == `i'
	qui su p00 if info_struct == `i',d
	local med_p00 = r(p50)
	replace num_dev_med_p00 = (p00 > `med_v') if info_struc == `i'
	replace dev_med_p00 = p00 - `med_v' if info_struc == `i'
} 

gen num_dev_med_both = (num_dev_med_p11 + num_dev_med_p00) / 2
gen dev_med_both = dev_med_p11 + dev_med_p00

sort subject half
by subject: egen frac_dev_bayes_p11 = mean(num_dev_bayes_p11)
by subject: egen frac_dev_bayes_p00 = mean(num_dev_bayes_p00)
by subject: egen frac_dev_bayes_cond_both = mean(num_dev_bayes_cond_both)
by subject: egen frac_dev_bayes_v = mean(num_dev_bayes_v)
by subject: egen avg_dev_bayes_v = mean(dev_bayes_v)
by subject: egen avg_dev_bayes_p11 = mean(dev_bayes_p11)
by subject: egen avg_dev_bayes_p00 = mean(dev_bayes_p00)
by subject: egen avg_dev_bayes_cond_both = mean(dev_bayes_cond_both)
by subject: egen med_dev_bayes_v = median(dev_bayes_v)
by subject: egen med_dev_bayes_p11 = median(dev_bayes_p11)
by subject: egen med_dev_bayes_p00 = median(dev_bayes_p00)
gen med_dev_bayes_cond_both = (med_dev_bayes_p11 + med_dev_bayes_p00) / 2
by subj: egen mean_dev_bayes_uncond = mean(dev_bayes_p1_elic)



by subject: egen sd_dev_bayes_v = sd(dev_bayes_v)
by subject: egen sd_dev_bayes_p11 = sd(dev_bayes_p11)
by subject: egen sd_dev_bayes_p00 = sd(dev_bayes_p00)
by subject: egen sd_dev_bayes_v_2 = sd(dev_bayes_v) if v >= 0.5
by subject: egen sd_dev_bayes_p11_2 = sd(dev_bayes_p11) if p11 >= 0.5
by subject: egen sd_dev_bayes_p00_2 = sd(dev_bayes_p00) if p00 >= 0.5
by subject: egen frac_dev_med_v = mean(num_dev_med_v)
by subject: egen frac_dev_med_p11 = mean(num_dev_med_p11)
by subject: egen frac_dev_med_p00 = mean(num_dev_med_p00)
by subject: egen frac_dev_med_both = mean(num_dev_med_both)
by subject: egen avg_dev_med_v = mean(dev_med_v)
by subject: egen avg_dev_med_p11 = mean(dev_med_p11)
by subject: egen avg_dev_med_p00 = mean(dev_med_p00)
by subject: egen avg_dev_med_both = mean(dev_med_both)
by subject: egen sd_dev_med_v = sd(dev_med_v)
by subject: egen sd_dev_med_p11 = sd(dev_med_p11)
by subject: egen sd_dev_med_p00 = sd(dev_med_p00)

by subject half: egen sd_dev_med_v_by_half = sd(dev_med_v)
by subject half: egen sd_dev_med_p11_by_half = sd(dev_med_p11)
by subject half: egen sd_dev_med_p00_by_half = sd(dev_med_p00)

gen sd_dev_med_both = (sd_dev_med_p11 + sd_dev_med_p00) / 2
gen sd_dev_med_both_by_half = (sd_dev_med_p11_by_half + sd_dev_med_p00_by_half) / 2

gen total_frac_dev = (2/3)*frac_dev_med_both + (1/3)*frac_dev_med_v
gen total_avg_dev = (2/3)*avg_dev_med_both + (1/3)*avg_dev_med_v
gen total_sd_dev = (2/3)*sd_dev_med_v + (1/3)*sd_dev_med_both

gen total_frac_dev_bayes = (2/3)*frac_dev_bayes_cond_both + (1/3)*frac_dev_bayes_v
gen total_avg_dev_bayes = (2/3)*avg_dev_bayes_cond_both + (1/3)*avg_dev_bayes_v


gen crt_score = (crt1 == 5) + (crt2 == 5) + (crt3 == 47)
gen crt_h = (crt_s == 3)



* tag subjects that sometimes report valuations below 50.

gen v_below_50_i = (v < 0.5)
gen v_pred_below_50_i = (v_pred < 0.5)
gen p11_below_50_i = (p11 < 0.5)
gen p00_below_50_i = (p00 < 0.5)
gen cond_below_50_i = max(p11_below_50_i, p00_below_50_i)

gen bound_below_100_i = (p00 < 0.96 & (info_struct == 6 | info_struct == 8 | info_struct == 10))

gen rogue_obs = max(v_below_50_i, p11_below_50_i, p00_below_50_i, bound_below_100_i)
replace rogue_obs = 0 if info_struct == 11
replace v_below_50_i = 0 if info_struct == 11
replace p11_below_50_i = 0 if info_struct == 11
replace p00_below_50_i = 0 if info_struct == 11
replace bound_below_100_i = 0 if info_struct == 11

sort subj
by subj: egen v_below_50 = max(v_below_50_i)
by subj: egen v_pred_below_50 = max(v_pred_below_50_i)
by subj: egen p11_below_50 = max(p00_below_50_i)
by subj: egen p00_below_50 = max(p00_below_50_i)

by subj: egen v_below_50_sum = sum(v_below_50_i)
by subj: egen v_pred_below_50_sum = sum(v_pred_below_50_i)
by subj: egen p11_below_50_sum = sum(p00_below_50_i)
by subj: egen p00_below_50_sum = sum(p00_below_50_i)
by subj: egen bound_below_100_sum = sum(bound_below_100_i)


gen mistake_sum = v_below_50_sum + p11_below_50_sum + p00_below_50_sum

gen cond_below_50 = max(p11_below_50, p00_below_50)
*gen rogue = cond_below_50 == 1 | v_below_50 == 1
gen cond_below_50_sum = p11_below_50_sum + p00_below_50_sum
gen rogue = (mistake_sum >= 3)


**** generating vars for checking Bayesianism

gen bayes_p11 = abs(p11 - b_p11) < 0.015
gen bayes_p00 = abs(p00 - b_p00) < 0.015
gen bayes_v = abs(v - v_pred_true_all) < 0.015
sort subject
by subject: egen frac_bayes_p11 = mean(bayes_p11)
by subject: egen frac_bayes_p00 = mean(bayes_p00)
by subject: egen frac_bayes_v = mean(bayes_v)
gen frac_bayes = (frac_bayes_p11 + frac_bayes_p00) / 2


gen bayes_p11_loose = abs(p11 - b_p11) < 0.045
gen bayes_p00_loose = abs(p00 - b_p00) < 0.045
gen bayes_v_loose = abs(v - v_pred_true_all) < 0.045
sort subject
by subject: egen frac_bayes_p11_loose = mean(bayes_p11_loose)
by subject: egen frac_bayes_p00_loose = mean(bayes_p00_loose)
by subject: egen frac_bayes_v_loose = mean(bayes_v_loose)
gen frac_bayes_loose = (frac_bayes_p11_loose + frac_bayes_p00_loose) / 2




****** logging variables

gen log_avg_dev_med_both = log(avg_dev_med_both)
gen log_avg_dev_med_v = log(avg_dev_med_v)
gen log_total_avg_dev_med = log(total_avg_dev)
gen log_mean_diff_hit_f = log(mean_diff_hit_f)

gen log_superstition_sum = log(superstition_sum)
gen log_lotr = log(lotr)

gen dev_logit_bayes_p11 = log(p11 / (1-p11)) - log(b_p11 / (1-b_p11))
gen dev_logit_bayes_p00 = log(p00 / (1-p00)) - log(b_p00 / (1-b_p00))

replace dev_logit_bayes_p00 = . if info_struct == 6 | info_struct == 8 | info_struct == 10



****** random selection of half the information structures



set seed 249233

drop rnd rndhalf
gen rnd = runiform()
sort subj rnd
gen rndid = [_n]
replace rndid = rndid-10*floor(rndid/10) + 1
gen rndhalf = (rndid <= 5)
replace rndhalf = 2 if rndhalf == 0
sort subj info_struct
drop rndid

by subj: egen v_first_i = mean(dev_bayes_v) if rndhalf == 1
by subj: egen v_second_i = mean(dev_bayes_v) if rndhalf == 2
by subj: egen v_first = mean(v_first_i)
by subj: egen v_second = mean(v_second_i)
drop v_first_i v_second_i

by subj: egen p11_first_i = mean(dev_logit_bayes_p11) if rndhalf == 1
by subj: egen p11_second_i = mean(dev_logit_bayes_p11) if rndhalf == 2
by subj: egen p11_first = mean(p11_first_i)
by subj: egen p11_second = mean(p11_second_i)
drop p11_first_i p11_second_i

by subj: egen p00_first_i = mean(dev_logit_bayes_p00) if rndhalf == 1
by subj: egen p00_second_i = mean(dev_logit_bayes_p00) if rndhalf == 2
by subj: egen p00_first = mean(p00_first_i)
by subj: egen p00_second = mean(p00_second_i)
drop p00_first_i p00_second_i

by subj: egen p1_first_i = mean(dev_bayes_p1_elic) if rndhalf == 1
by subj: egen p1_second_i = mean(dev_bayes_p1_elic) if rndhalf == 2
by subj: egen p1_first = mean(p1_first_i)
by subj: egen p1_second = mean(p1_second_i)
drop p1_first_i p1_second_i

by subj: egen sd_v = sd(v)
by subj: egen sd_p11 = sd(p11)
by subj: egen sd_p00 = sd(p00)











**** labelling variables

label define info_s_label 1 "a" 2 "b" 3 "c" 4 "d" 5 "e" 6 "f" 7 "g" 8 "h" 9 "i" 10 "j"
label values info_struct info_s_label

drop if v < 0



replace v_pred_true_all = 0.8 if v_pred_true_all == . & period == 11





******************************
***** final save *************
******************************


saveold "data/data_all.dta", replace






**** saving data in form suitable for 3d-kernel regressions

drop if session == 4 | session == 1 | session == 10

gen dev_p11 = p11 - b_p11
gen dev_p00 = p00 - b_p00
gen dev_p1 = p1 - b_p1

order a b v dev_p11 dev_p00 dev_p1 p11 p00 p1 v_pred 

keep a b v dev_p11 dev_p00 dev_p1 p11 p00 p1 v_pred

outsheet using "/Users/sandroambuehl/Dropbox/Demand for Information/DATA_ANALYSIS/data/data_for_kernel_reg.csv", replace

